Superheroes and villains in comics can have a real impact on society. They’re supposed to represent what’s good and bad. Thus, the way the character is portrayed will have an influence on the reader. If for example all villains are part of the same minority, people will unconsciously see them in real life as bad people. Moreover, a character like Tony Stark could inspire people to study engineering. And these are just examples to illustrate the power comics can have on us. We can thus study this choice of characters, how diverse it is, and if there is a tendency towards a specific portrait for superheroes and villains.
# Import libraries
import pandas as pd
import numpy as np
%matplotlib inline
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import math
import re
import string
import pickle
import pyperclip
import plotly
import plotly.graph_objects as go
import chart_studio
import chart_studio.tools as tls
import chart_studio.plotly as py
from IPython.display import HTML
from plotly.subplots import make_subplots
import plotly.graph_objects as go
Loading the previous created dataframe for analysis
marvel_pers_final = pd.read_pickle("data_pickle/marvel_pers_final.txt")
dc_pers_final = pd.read_pickle('data_pickle/dc_pers_final.txt')
We drop the rows where Good_count=Bad_count=Neutral_count=0, since we could not say anything about the overall behavior of the character
# Get the rows where Good_count = Bad_count = Neutral_count = 0
index_ = marvel_pers_final[ (marvel_pers_final['Good_count'] == 0) & \
(marvel_pers_final['Bad_count'] == 0) & \
(marvel_pers_final['Neutral_count'] == 0)].index
# Drop the rows
marvel_pers_final.drop(index_ , inplace=True)
# Reset the indexes
marvel_pers_final = marvel_pers_final.reset_index(drop = True)
marvel_pers_final.head(5)
We do the same for the DC dataframe
# Get the rows where Good_count = Bad_count = Neutral_count = 0
index_ = dc_pers_final[ (dc_pers_final['Good_count'] == 0) & \
(dc_pers_final['Bad_count'] == 0) & \
(dc_pers_final['Neutral_count'] == 0)].index
# Drop the rows
dc_pers_final.drop(index_ , inplace=True)
# Reset the indexes
dc_pers_final = dc_pers_final.reset_index(drop = True)
dc_pers_final.head(5)
In the two dataframe, there are some words that are written differently but that mean the same thing, therefore we replace them in the DC dataframe by their twin words of the Marvel dataframe
# Replace Genderless by Agender in DC dataframe for proper comparison with Marvel Dataframe
dc_pers_final["Gender"]= dc_pers_final["Gender"].replace('Genderless', 'Agender')
# Replace No Hair by Bald in DC dataframe for proper comparison with DC Dataframe
dc_pers_final["Hair"]= dc_pers_final["Hair"].replace('No Hair', 'Bald')
marvel_pers_final['Education'] = marvel_pers_final['Education'].str.lower()
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('n/a'), 'Education'] = 'Unknown'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('unknown'), 'Education'] = 'Unknown'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('unrevealed'), 'Education'] = 'Unknown'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('innaplicable'), 'Education'] = 'Unknown'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('dropout'), 'Education'] = 'Dropout'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('unfinished'), 'Education'] = 'Dropout'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('high school'), 'Education'] = 'High School'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('ms'), 'Education'] = 'Master'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('master'), 'Education'] = 'Master'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('masters'), 'Education'] = 'Master'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('ba'), 'Education'] = 'Bachelor'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('phd'), 'Education'] = 'PH.D'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('doctor'), 'Education'] = 'PH.D'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('ph.d'), 'Education'] = 'PH.D'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('doctorate'), 'Education'] = 'PH.D'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('university'), 'Education'] = 'University'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('college'), 'Education'] = 'College'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('school'), 'Education'] = 'school'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('programmed'), 'Education'] = 'AI'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('artificial'), 'Education'] = 'AI'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('self'), 'Education'] = 'Independent'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('trained'), 'Education'] = 'Trained'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('training'), 'Education'] = 'Trained'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('institute'), 'Education'] = 'Institute'
marvel_pers_final.loc[marvel_pers_final['Education'].str.contains('degree'), 'Education'] = 'Advanced degree'
dc_pers_final['Education'] = dc_pers_final['Education'].str.lower()
dc_pers_final.loc[dc_pers_final['Education'].str.contains('n/a'), 'Education'] = 'Unknown'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('unknown'), 'Education'] = 'Unknown'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('unrevealed'), 'Education'] = 'Unknown'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('innaplicable'), 'Education'] = 'Unknown'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('dropout'), 'Education'] = 'Dropout'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('unfinished'), 'Education'] = 'Dropout'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('high school'), 'Education'] = 'High School'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('ms'), 'Education'] = 'Master'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('master'), 'Education'] = 'Master'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('masters'), 'Education'] = 'Master'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('ba'), 'Education'] = 'Bachelor'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('phd'), 'Education'] = 'PH.D'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('doctor'), 'Education'] = 'PH.D'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('ph.d'), 'Education'] = 'PH.D'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('doctorate'), 'Education'] = 'PH.D'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('university'), 'Education'] = 'University'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('college'), 'Education'] = 'College'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('school'), 'Education'] = 'school'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('programmed'), 'Education'] = 'AI'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('artificial'), 'Education'] = 'AI'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('self'), 'Education'] = 'Independent'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('trained'), 'Education'] = 'Trained'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('training'), 'Education'] = 'Trained'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('institute'), 'Education'] = 'Institute'
dc_pers_final.loc[dc_pers_final['Education'].str.contains('degree'), 'Education'] = 'Advanced degree'
The Goal of the following analysis is to compare Marvel and DC comics characters' attributs with respect to their general behavior: Good, Bad or Neutral
First we display the columns of the two dataframes to show what are the attributs we can compare
marvel_pers_final.columns
dc_pers_final.columns
There are categorical attributs
categorical attributs: ['Marital Status', 'Occupation', 'Education', 'Gender', 'Hair', 'Eyes', 'Place of Birth', 'Citizenship']
And also continuous attributs
continuous attributs: ['Weight in float', 'Height in float']
categorical = ['Marital Status', 'Occupation', 'Education', 'Gender', 'Hair', 'Eyes', 'Place of Birth', 'Citizenship']
continuous = ['Height in float', 'Weight in float']
This color table is composed of the colors we are going to use through the notebook to differentiate between the Marvel and DC comics and also between the Bad, Neutral and Good Behavior. At the same time, we also create a Behavior table
colors = [["#98042D", "#BF4C41", "#FCC0C5"], # Marvel: 'Bad', 'Neutral', 'Good'
["#0C2D48", "#2E8BC0", "#B1D4E0"]] # DC : 'Bad', 'Neutral', 'Good'
behaviors = ['Bad', 'Neutral', 'Good']
As we will do a Global and Temporal analysis, we first clean the year column
def clean_years(l):
if l is None:
new_l = []
else:
new_l = [year for year in l if year>1930]
return new_l
dc_pers_final['years'] = dc_pers_final['years'].apply(clean_years)
marvel_pers_final['years'] = marvel_pers_final['years'].apply(clean_years)
dc_pers_final['First_apparition'] = dc_pers_final['years'].apply(lambda x: min(x) if len(x)!=0 else None)
marvel_pers_final['First_apparition'] = marvel_pers_final['years'].apply(lambda x: min(x) if len(x)!=0 else None)
def top_characteristics(dc_df, marvel_df, attribut = '', top=6):
'''
This function returns a list of the top categories for an attribut. It is called by Marvel_DC_GBN_dataframes() in order
to get the list on which to build on the dataframe
dc_df: DC dataframe
dc_marvel: Marvel dataframe
attribut: ['Citizenship', 'Marital Status', 'Occupation', 'Education', 'Gender','Eyes', 'Hair', 'Place of Birth']
top: The number of categories in attribut that appear the most
'''
#Create DC subdataframe by counting the number of each categories in the attribut column, in descending order
subdc = pd.DataFrame(dc_df[attribut].value_counts())
if 'Unknown' in subdc.index:
subdc = subdc.drop('Unknown')
subdc = subdc.reset_index()
#The score corresponds to the place in the dataframe, the lower the score, the higher the number of counts, so the more
#often a category appears
subdc['Scores'] = subdc.index
subdc = subdc.set_index('index')
#Create Marvel subdataframe by counting the number of each categories in the attribut column, in descending order
submarvel = pd.DataFrame(marvel_df[attribut].value_counts())
if 'Unknown' in submarvel.index:
submarvel = submarvel.drop('Unknown')
submarvel = submarvel.reset_index()
#The score corresponds to the place in the dataframe, the lower the score, the higher the number of counts, so the more
#often a category appears
submarvel['Scores'] = submarvel.index
submarvel = submarvel.set_index('index')
#Outer Join of the two dataframes: we give a very high bias to the score of the categories that appear in one dataframe
#but not in the other, so that we prioritize the categories that appear in both dataframe.
dc_marvel = submarvel.join(subdc, how='outer', lsuffix='_Marvel', rsuffix='_DC').fillna(1e+6)
dc_marvel['Scores'] = dc_marvel['Scores_Marvel'] + dc_marvel['Scores_DC']
dc_marvel = dc_marvel.sort_values('Scores', ascending=True)
return dc_marvel.iloc[:top].index.tolist()
We print below the dictionnaries of the global (i.e over all years) most common attributs in DC and Marvel dataframes
########## HAIR DICTIONNARY #############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Hair', top=6)
########## EYES DICTIONNARY #############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Eyes', top=6)
############ GENDER DICTIONNARY ###########
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Gender', top=6)
############ OCCUPATION DICTIONNARY ############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Occupation', top=6)
############## CITIZENSHIP DICTIONNARY ###############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Citizenship', top=6)
############## MARITAL STATUS DICTIONNARY ###############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Marital Status', top=6)
############## PLACE OF BIRTH DICTIONNARY ###############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Place of Birth', top=6)
############## EDUCATION DICTIONNARY ###############
top_characteristics(dc_pers_final, marvel_pers_final, attribut = 'Education', top=6)
def create_GBN_count_dataframe (dataframe, column_str=''):
'''
This function create a dataframe based on the top attribut categories for each behavior category : Good, Bad, Neutral.
It is called by Marvel_DC_GBN_dataframes() that has extracted the top attribut categories
dataframe: Marvel or DC subdataframe (Sample on categories and eventually for a particular year)
column_str: precise which attribut to study in particular
'''
#If column_str is empty, raise error
if column_str == '':
print('Error: please select a column to study!\n')
return
#check if column belongs to dataframe
bool_ = column_str in dataframe.columns
if not bool_ :
print('Error: {} column does not belong to the dataframe\n'.format(column_str))
return
#Create an empty list of dataframe
dfList = []
#Create a dataframe for each behavior category
for i in range(len(behaviors)):
#Obtain sub-dataframe by grouping on the behavior category
sub_df = dataframe[dataframe['Behavior'] == behaviors[i]]
#Value count on the column of interest
sub_df = pd.DataFrame(sub_df[column_str].value_counts())
#Check if we have unknown values and drop them
if 'Unknown' in sub_df.index:
sub_df = sub_df.drop('Unknown')
sub_df.columns = [behaviors[i]+'_count']
dfList.append(sub_df)
#dfs = [df.set_index(column_str) for df in dfList]
#Outer join of the created dataframes on columnn_str. Fill Nan values with 0
df = pd.concat(dfList, axis=1, join='outer', sort=True) \
.fillna(0)
return df.sort_values(by='Good_count', ascending=False)
def Marvel_DC_GBN_dataframes(dc_df, marvel_df, charac='', top=6, year=False):
'''
This function builds the Good-Bad-Neutral DC-Marvel Dataframe in order to generate the bar plot with
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df) function
dc_df : DC dataframe
marvel_df : Marvel dataframe
charac : characteristic
top : the top subcategories of the characterisitic we want to keep
'''
#If a year is specify, we do the analysis on one year
if year:
# We drop the duplicates so that we don't take into account the characters that appear several times in one year
marvel_df = marvel_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_df = dc_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_df = dc_df[dc_df['years'].apply(lambda x: x in year)]
marvel_df = marvel_df[marvel_df['years'].apply(lambda x: x in year)]
#If the dataframes are empty, we return empty dataframes
if(marvel_df.empty or dc_df.empty):
return pd.DataFrame(), pd.DataFrame()
else:
# Create dictionnary of the top subcategories of the characteristic
dict_ = top_characteristics(dc_df, marvel_df, attribut = charac, top=top)
# Sample Marvel and DC dataframe with these subcategories, and with the year
subdc = dc_df[dc_df[charac].apply(lambda x: x in dict_)]
submarvel = marvel_df[marvel_df[charac].apply(lambda x: x in dict_)]
# Create a Marvel and DC dataframe based on the counts of the Good-Bad-Neutral behavior
GBN_submarvel = create_GBN_count_dataframe (submarvel, column_str=charac)
GBN_subdc = create_GBN_count_dataframe (subdc, column_str=charac)
#Join the two generated dataframe on index=top subcategories
GBN_df = GBN_submarvel.join(GBN_subdc, lsuffix='_Marvel', rsuffix='_DC').fillna(0)
#Normalize the Good-Bad-Neutral Marvel and DC dataframe and join them again
GBN_submarvel_norm = GBN_submarvel.div(GBN_submarvel.sum(axis=1), axis=0)
GBN_subdc_norm = GBN_subdc.div(GBN_subdc.sum(axis=1), axis=0)
GBN_df_norm = GBN_submarvel_norm.join(GBN_subdc_norm, lsuffix='_Marvel', rsuffix='_DC').fillna(0)
return GBN_df, GBN_df_norm
The functions below allow to adapte the number of x-axes that may change with the number of found attribut categories
def update_layout_6Xaxis(fig, GBN_df_norm, sep, charac, visible=False):
'''
This function update the figure layout
fig: figure
GBN_df_norm: Normalized Good-Bad-Neutral dataframe
sep: separtion between the different x-axes
charac: attribut
visible: Define if the x-axes are visible or not, default=False
'''
fig.update_layout(barmode='stack',
xaxis= go.layout.XAxis(
domain= [0, sep],
anchor='x1',
title_text='<b>'+GBN_df_norm.index[0]+'</b>',
visible = visible
),
xaxis2= dict(
domain=[sep, 2*sep],
anchor='x2',
title_text='<b>'+GBN_df_norm.index[1]+'</b>',
visible = visible
),
xaxis3= go.layout.XAxis(
domain= [2*sep, 3*sep],
anchor='x3',
title_text='<b>'+GBN_df_norm.index[2]+'</b>',
visible = visible
),
xaxis4= go.layout.XAxis(
domain=[3*sep, 4*sep],
anchor='x4',
title_text='<b>'+GBN_df_norm.index[3]+'</b>',
visible = visible
),
xaxis5= go.layout.XAxis(
domain=[4*sep, 5*sep],
anchor='x5',
title_text='<b>'+GBN_df_norm.index[4]+'</b>',
visible = visible
),
xaxis6= go.layout.XAxis(
domain= [5*sep, 1],
anchor='x6',
title_text='<b>'+GBN_df_norm.index[5]+'</b>',
visible = visible
),
title=dict(text= charac +" Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"),
yaxis_tickformat = '%',
yaxis_title="<b>Proportion [%]</b>"
)
def update_layout_5Xaxis(fig, GBN_df_norm, sep, charac, visible=False):
'''
This function update the figure layout
fig: figure
GBN_df_norm: Normalized Good-Bad-Neutral dataframe
sep: separtion between the different x-axes
charac: attribut
visible: Define if the x-axes are visible or not, default=False
'''
fig.update_layout(barmode='stack',
xaxis= go.layout.XAxis(
domain= [0, sep],
anchor='x1',
title_text='<b>'+GBN_df_norm.index[0]+'</b>',
visible = visible
),
xaxis2= dict(
domain=[sep, 2*sep],
anchor='x2',
title_text='<b>'+GBN_df_norm.index[1]+'</b>',
visible = visible
),
xaxis3= go.layout.XAxis(
domain= [2*sep, 3*sep],
anchor='x3',
title_text='<b>'+GBN_df_norm.index[2]+'</b>',
visible = visible
),
xaxis4= go.layout.XAxis(
domain=[3*sep, 4*sep],
anchor='x4',
title_text='<b>'+GBN_df_norm.index[3]+'</b>',
visible = visible
),
xaxis5= go.layout.XAxis(
domain=[4*sep, 5*sep],
anchor='x5',
title_text='<b>'+GBN_df_norm.index[4]+'</b>',
visible = visible
),
xaxis6= go.layout.XAxis(
domain= [5*sep, 1],
anchor='x6',
title_text='',
visible = False
),
title=dict(text= charac +" Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"),
yaxis_tickformat = '%',
yaxis_title="<b>Proportion [%]</b>"
)
def update_layout_4Xaxis(fig, GBN_df_norm, sep, charac, visible=False):
'''
This function update the figure layout
fig: figure
GBN_df_norm: Normalized Good-Bad-Neutral dataframe
sep: separtion between the different x-axes
charac: attribut
visible: Define if the x-axes are visible or not, default=False
'''
fig.update_layout(barmode='stack',
xaxis= go.layout.XAxis(
domain= [0, sep],
anchor='x1',
title_text='<b>'+GBN_df_norm.index[0]+'</b>',
visible = visible
),
xaxis2= dict(
domain=[sep, 2*sep],
anchor='x2',
title_text='<b>'+GBN_df_norm.index[1]+'</b>',
visible = visible
),
xaxis3= go.layout.XAxis(
domain= [2*sep, 3*sep],
anchor='x3',
title_text='<b>'+GBN_df_norm.index[2]+'</b>',
visible = visible
),
xaxis4= go.layout.XAxis(
domain=[3*sep, 4*sep],
anchor='x4',
title_text='<b>'+GBN_df_norm.index[3]+'</b>',
visible = visible
),
xaxis5= go.layout.XAxis(
domain=[4*sep, 5*sep],
anchor='x5',
title_text='',
visible = False
),
xaxis6= go.layout.XAxis(
domain= [5*sep, 1],
anchor='x6',
title_text='',
visible = False
),
title=dict(text= charac +" Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"),
yaxis_tickformat = '%',
yaxis_title="<b>Proportion [%]</b>"
)
def update_layout_3Xaxis(fig, GBN_df_norm, sep, charac, visible = False):
'''
This function update the figure layout
fig: figure
GBN_df_norm: Normalized Good-Bad-Neutral dataframe
sep: separtion between the different x-axes
charac: attribut
visible: Define if the x-axes are visible or not, default=False
'''
fig.update_layout(barmode='stack',
xaxis= go.layout.XAxis(
domain= [0, sep],
anchor='x1',
title_text='<b>'+GBN_df_norm.index[0]+'</b>',
visible = visible
),
xaxis2= dict(
domain=[sep, 2*sep],
anchor='x2',
title_text='<b>'+GBN_df_norm.index[1]+'</b>',
visible = visible
),
xaxis3= go.layout.XAxis(
domain= [2*sep, 3*sep],
anchor='x3',
title_text='<b>'+GBN_df_norm.index[2]+'</b>',
visible = visible
),
xaxis4= go.layout.XAxis(
domain=[3*sep, 4*sep],
anchor='x4',
title_text='',
visible = False
),
xaxis5= go.layout.XAxis(
domain=[4*sep, 5*sep],
anchor='x5',
title_text='',
visible = False
),
xaxis6= go.layout.XAxis(
domain= [5*sep, 1],
anchor='x6',
title_text='',
visible = False
),
title=dict(text= charac +" Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"),
yaxis_tickformat = '%',
yaxis_title="<b>Proportion [%]</b>"
)
def update_layout_2Xaxis(fig, GBN_df_norm, sep, charac, visible = False):
'''
This function update the figure layout
fig: figure
GBN_df_norm: Normalized Good-Bad-Neutral dataframe
sep: separtion between the different x-axes
charac: attribut
visible: Define if the x-axes are visible or not, default=False
'''
fig.update_layout(barmode='stack',
xaxis= go.layout.XAxis(
domain= [0, sep],
anchor='x1',
title_text='<b>'+GBN_df_norm.index[0]+'</b>',
visible = visible
),
xaxis2= dict(
domain=[sep, 2*sep],
anchor='x2',
title_text='<b>'+GBN_df_norm.index[1]+'</b>',
visible = visible
),
xaxis3= go.layout.XAxis(
domain= [2*sep, 3*sep],
anchor='x3',
title_text='',
visible = False
),
xaxis4= go.layout.XAxis(
domain=[3*sep, 4*sep],
anchor='x4',
title_text='',
visible = False
),
xaxis5= go.layout.XAxis(
domain=[4*sep, 5*sep],
anchor='x5',
title_text='',
visible = False
),
xaxis6= go.layout.XAxis(
domain= [5*sep, 1],
anchor='x6',
title_text='',
visible = False
),
title=dict(text= charac +" Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"),
yaxis_tickformat = '%',
yaxis_title="<b>Proportion [%]</b>"
)
def update_layout_1Xaxis(fig, GBN_df_norm, sep, charac, visible = False):
'''
This function update the figure layout
fig: figure
GBN_df_norm: Normalized Good-Bad-Neutral dataframe
sep: separtion between the different x-axes
charac: attribut
visible: Define if the x-axes are visible or not, default=False
'''
fig.update_layout(barmode='stack',
xaxis= go.layout.XAxis(
domain= [0, sep],
anchor='x1',
title_text='<b>'+GBN_df_norm.index[0]+'</b>',
visible = visible
),
xaxis2= dict(
domain=[sep, 2*sep],
anchor='x2',
title_text='',
visible = False
),
xaxis3= go.layout.XAxis(
domain= [2*sep, 3*sep],
anchor='x3',
title_text='',
visible = False
),
xaxis4= go.layout.XAxis(
domain=[3*sep, 4*sep],
anchor='x4',
title_text='',
visible = False
),
xaxis5= go.layout.XAxis(
domain=[4*sep, 5*sep],
anchor='x5',
title_text='',
visible = False
),
xaxis6= go.layout.XAxis(
domain= [5*sep, 1],
anchor='x6',
title_text='',
visible = False
),
title=dict(text= charac +" Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
font=dict(family='Komika Hand',
size=10,
color="#7f7f7f"),
yaxis_tickformat = '%',
yaxis_title="<b>Proportion [%]</b>"
)
def DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, charac='', visible_trace=False, visible_axis=False):
'''
This function takes a dataframe built on the Good, Bad, Neutral behaviors for a certain characteristic, for both marvel
and DC; and the same dataframe, normalized. It outputs a stacked (Good-Bad-Neutral) & grouped (Marvel - DC) barplot for
the different sub-categories of the characteristic
GBN_df_norm : normalized GoodBadNeutral dataframe
GBN_df : GoodBadNeutral dataframe
charac : attribut
visible_trace: define if the trace is visible, default=False
visible_axis: define if the x_axis is visible, default=False
'''
number_traces = 0;
for i, categ in enumerate(GBN_df_norm.index.tolist()):
for j, behavior in enumerate(behaviors):
if i == 0:
show = True
else:
show = False
fig.add_trace(go.Bar(
x= ["Marvel"],
y= [GBN_df_norm[behavior+'_count_Marvel'][categ]],
text=[GBN_df[behavior+'_count_Marvel'][categ]],
textposition='auto',
name= behavior+' Marvel',
visible = visible_trace,
xaxis= 'x' + str(i+1),
marker_color=colors[0][j],
showlegend = show,
legendgroup = colors[0][j],
))
fig.add_trace(go.Bar(
x= ["DC"],
y= [GBN_df_norm[behavior+'_count_DC'][categ]],
text=[GBN_df[behavior+'_count_DC'][categ]],
textposition='auto',
name= behavior+' DC',
visible = visible_trace,
xaxis= 'x' + str(i+1),
marker_color=colors[1][j],
showlegend = show,
legendgroup = colors[1][j],
))
number_traces += 2
#The number of x-axes correspond to the number of attributs categories
number_xaxis = len(GBN_df_norm.index.tolist())
top=6
sep=1/top
#Check the number of categories and adapt the number of layout respectively
if(number_xaxis==6):
update_layout_6Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
elif(number_xaxis==5):
update_layout_5Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
elif(number_xaxis==4):
update_layout_4Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
elif(number_xaxis==3):
update_layout_3Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
elif(number_xaxis==3):
update_layout_3Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
elif(number_xaxis==2):
update_layout_2Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
elif(number_xaxis==1):
update_layout_1Xaxis(fig, GBN_df_norm, sep, charac, visible_axis)
else:
print('Nothing to be plotted!!\n')
#fig.show()
return number_traces
We then plotted the Global Trend of the different attributs. It is also possible to do it by year
##################### HAIR TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Hair', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year :
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, 'Global Hair', True, True)
else:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, str(year) + ' Hair', True, True)
fig.show()
##################### EYES TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Eyes', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, 'Global Eyes', True, True)
else:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, str(year) + ' Eyes', True, True)
fig.show()
##################### Gender TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Gender', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, 'Global Gender', True, True)
else:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, str(year) + ' Gender', True, True)
fig.show()
##################### MARITAL STATUS TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Marital Status', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, 'Global Marital Status', True, True)
else:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, str(year) + ' Marital Status', True, True)
fig.show()
##################### PLACE OF BIRTH TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Place of Birth', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, 'Global Place of Birth', True, True)
else:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, str(year) + ' Place of Birth', True, True)
fig.show()
##################### EDUCATION TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Education', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, 'Global Education', True, True)
else:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, str(year) + ' Education', True, True)
fig.show()
##################### OCCUPATION TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Occupation', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, 'Global Occupation', True, True)
else:
DC_vs_Marvel_behavior_analysis (GBN_df_norm, GBN_df, fig, str(year) + ' Occupation', True, True)
fig.show()
##################### CITIZENSHIP TREND ######################
year = 0
fig = go.Figure()
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Citizenship', top=6, year=year)
if GBN_df.empty:
print('DataFrame is Empty, nothing can be plotted!')
else:
if not year:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, 'Global Citizenship', True, True)
else:
DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, str(year) + ' Citizenship', True, True)
fig.show()
############# Button Graph ############
############# Global Trend ############
#Categories : ['Citizenship', 'Marital Status', 'Occupation', 'Education', 'Gender','Eyes', 'Hair', 'Place of Birth']
fig = go.Figure()
############# Number of traces = top * number_of_behavior (good, bad, neutral)
############# Hair: 18 traces ############
hair_GBN_df, hair_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Hair', top=6)
number_traces_hair = DC_vs_Marvel_behavior_analysis(hair_GBN_df_norm, hair_GBN_df, fig, 'Global Hair', visible_axis=True)
############# Marital Status: 18 traces ############
MS_GBN_df, MS_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Marital Status', top=6)
number_traces_MS = DC_vs_Marvel_behavior_analysis(MS_GBN_df_norm, MS_GBN_df, fig, 'Global Marital Status')
############# Citizenship: 18 traces ###############
City_GBN_df, City_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Citizenship', top=6)
number_traces_City = DC_vs_Marvel_behavior_analysis(City_GBN_df_norm, City_GBN_df, fig, 'Global Citizenship')
############# Eyes: 18 traces ###############
Eyes_GBN_df, Eyes_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Eyes', top=6)
number_traces_Eyes = DC_vs_Marvel_behavior_analysis(Eyes_GBN_df_norm, Eyes_GBN_df, fig, 'Global Eyes')
############# Education: 18 traces ###############
Edu_GBN_df, Edu_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Education', top=6)
number_traces_Edu = DC_vs_Marvel_behavior_analysis(Edu_GBN_df_norm, Edu_GBN_df, fig, 'Global Education')
############# Occupation: 18 traces ###############
Occ_GBN_df, Occ_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Occupation', top=6)
number_traces_Occ = DC_vs_Marvel_behavior_analysis(Occ_GBN_df_norm, Occ_GBN_df, fig, 'Global Occupation')
############# Place of Birth: 18 traces ###############
Birth_GBN_df, Birth_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Place of Birth', top=6)
number_traces_Birth = DC_vs_Marvel_behavior_analysis(Birth_GBN_df_norm, Birth_GBN_df, fig, 'Global Place of Birth')
############ Gender : 18 traces ##################
Gender_GBN_df, Gender_GBN_df_norm = Marvel_DC_GBN_dataframes(dc_pers_final, marvel_pers_final, charac='Gender', top=6)
number_traces_Gender = DC_vs_Marvel_behavior_analysis(Gender_GBN_df_norm, Gender_GBN_df, fig, 'Global Gender')
# Make the first traces visible
for i in range(number_traces_hair):
fig.data[i].visible = True
#### BUTTON ####
# create the filters
visible_Hair= [True]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_MS= [False]*number_traces_hair + [True]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_City= [False]*number_traces_hair + [False]*number_traces_MS + [True]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Eyes= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [True]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Edu= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [True]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Occ= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [True]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Birth= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [True]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Gender= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [True]*number_traces_Gender
# apply filters
top=6
sep = 1/top
fig.update_layout(
updatemenus=[
#category button
go.layout.Updatemenu(
active=0,
pad={"r": 10, "t": 10},
x=-0.22,
y=1.15,
xanchor='left',
yanchor='top',
buttons=list([
dict(label="Hair",
method="update",
args=[{"visible": visible_Hair},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+hair_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+hair_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+hair_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+hair_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+hair_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+hair_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Marital Status",
method="update",
args=[{"visible": visible_MS},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+MS_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+MS_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+MS_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+MS_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+MS_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+MS_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Citizenship",
method="update",
args=[{"visible": visible_City},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+City_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+City_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+City_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+City_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+City_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+City_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Eyes",
method="update",
args=[{"visible": visible_Eyes},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Eyes_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Eyes_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Eyes_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Eyes_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Eyes_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Eyes_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Education",
method="update",
args=[{"visible": visible_Edu},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Edu_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Edu_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Edu_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Edu_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Edu_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Edu_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Occupation",
method="update",
args=[{"visible": visible_Occ},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Occ_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Occ_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Occ_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Occ_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Occ_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Occ_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Place of Birth",
method="update",
args=[{"visible": visible_Birth},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Birth_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Birth_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Birth_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Birth_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Birth_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Birth_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Gender",
method="update",
args=[{"visible": visible_Gender},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Gender_GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Gender_GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Gender_GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Gender_GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Gender_GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Gender_GBN_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
])
]),
),
])
update_layout_6Xaxis(fig, hair_GBN_df_norm, sep, charac="Global", visible=True)
fig.show()
#This part save the plot
chart_studio.tools.set_credentials_file(username='schmider', api_key='OGpZh1yfEHe4hFbVMaPO')
url = py.plot(fig, filename='GlobalTrend_Marvel_vs_DC.html', auto_open=False,)
print(url)
# Copy the above url in the get_embde method, and give the output to antoine in order to put it on the website.
tls.get_embed(url)
pyperclip.copy(str(plotly.offline.plot(fig, include_plotlyjs=False, output_type='div')))
def Temporal_Slider_Marvel_vs_DC_GBN(dc_df, marvel_df, charac=''):
#create figure
fig = go.Figure()
years = range(1929,2020,5)
list_num_traces = []
list_year = []
x_axis = []
top = 6
sep = 1/top
startyear = 0
#Along the time
for i, year in enumerate(years):
#take statistics
GBN_df, GBN_df_norm = Marvel_DC_GBN_dataframes(dc_df, marvel_df, charac=charac, top=6, \
year=[year, year+1, year+2, year+3, year+4])
if GBN_df.index.empty:
continue
num_traces = DC_vs_Marvel_behavior_analysis(GBN_df_norm, GBN_df, fig, charac)
list_year.append(str(year)+'-'+str(year+4))
if len(list_year)==1:
first_GBN_df_norm = GBN_df_norm
list_num_traces.append(num_traces)
if len(GBN_df.index) == 6:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+GBN_df_norm.index[5]+'</b>')
})
elif len(GBN_df.index) == 5:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+GBN_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(GBN_df.index) == 4:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+GBN_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(GBN_df.index) == 3:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+GBN_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='',
visible=False),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(GBN_df.index) == 2:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+GBN_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='',
visible=False),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='',
visible=False),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(GBN_df.index) == 1:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+GBN_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='',
visible=False),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='',
visible=False),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='',
visible=False),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
# Make the first traces visible
for i in range(list_num_traces[0]):
fig.data[i].visible = True
# Create and add slider
steps = []
sum_num_traces = 0
for i, num_traces in enumerate(list_num_traces):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
x_axis[i],
{'yaxis': {'title': 'Proportion %'}}]
)
for j in range(num_traces):
step["args"][0]['visible'][sum_num_traces +j] = True
sum_num_traces += num_traces
step['label']=list_year[i]
steps.append(step)
sliders = [dict(
active=0,
yanchor='top',
xanchor='left',
currentvalue={
'font': {'size': 10},
'prefix': 'Year:',
'visible': False,
'xanchor': 'left',
},
pad= {'b': 10, 't': 50},
y= 0,
steps=steps,
)]
#Update Fisrt trace
if(len(first_GBN_df_norm.index)==6):
update_layout_6Xaxis(fig, first_GBN_df_norm, sep, charac, True)
elif(len(first_GBN_df_norm.index)==5):
update_layout_5Xaxis(fig, first_GBN_df_norm, sep, charac, True)
elif(len(first_GBN_df_norm.index)==4):
update_layout_4Xaxis(fig, first_GBN_df_norm, sep, charac, True)
elif(len(first_GBN_df_norm.index)==3):
update_layout_3Xaxis(fig, first_GBN_df_norm, sep, charac, True)
elif(len(first_GBN_df_norm.index)==3):
update_layout_3Xaxis(fig, first_GBN_df_norm, sep, charac, True)
elif(len(first_GBN_df_norm.index)==2):
update_layout_2Xaxis(fig, first_GBN_df_norm, sep, charac, True)
elif(len(first_GBN_df_norm.index)==1):
update_layout_1Xaxis(fig, first_GBN_df_norm, sep, charac, True)
fig.update_layout(title=dict(text="Temporal " + charac + " Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
sliders=sliders,
)
fig.show()
#This part save the plot
chart_studio.tools.set_credentials_file(username='pgaudill', api_key='PWDouWI3xODknjWdC0cG')
url = py.plot(fig, filename='Temporal'+charac+'Trend_Marvel_vs_DC.html', auto_open=False,)
print(url)
# Copy the above url in the get_embed method, and give the output to Ahmed in order to put it on the website.
tls.get_embed(url)
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Occupation')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Gender')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Hair')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Eyes')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Marital Status')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Education')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Citizenship')
Temporal_Slider_Marvel_vs_DC_GBN(dc_pers_final, marvel_pers_final, charac='Place of Birth')
We will perform the same analysis that time based on the famousness of the characters
dc_df_fame = pd.read_pickle("data_pickle/dc_longevity")
marvel_df_fame = pd.read_pickle("data_pickle/marvel_longevity")
We create a set of colors to indicate the famouness of the Marvek and DC characters. The colors are closed to the ones chosen to disinguish the behaviors because the principle is the same and it makes sense for the comparison between Marvel and DC
colors_fame = [["#98042D", "#BF4C41", "#FCC0C5"], # Marvel: 'Forgotten', 'Intermediate', 'Famous'
["#0C2D48", "#2E8BC0", "#B1D4E0"]] # DC : 'Forgotten', 'Intermediate', 'Famous'
fame = ['Forgotten', 'Intermediate', 'Famous']
Again, we clean the Education attribut of DC and Marvel dataframe since it is too exhaustive
dc_df_fame['Education'] = dc_df_fame['Education'].str.lower()
dc_df_fame.loc[dc_df_fame['Education'].str.contains('n/a'), 'Education'] = 'Unknown'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('unknown'), 'Education'] = 'Unknown'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('unrevealed'), 'Education'] = 'Unknown'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('innaplicable'), 'Education'] = 'Unknown'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('dropout'), 'Education'] = 'Dropout'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('unfinished'), 'Education'] = 'Dropout'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('high school'), 'Education'] = 'High School'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('ms'), 'Education'] = 'Master'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('master'), 'Education'] = 'Master'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('masters'), 'Education'] = 'Master'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('ba'), 'Education'] = 'Bachelor'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('phd'), 'Education'] = 'PH.D'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('doctor'), 'Education'] = 'PH.D'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('ph.d'), 'Education'] = 'PH.D'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('doctorate'), 'Education'] = 'PH.D'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('university'), 'Education'] = 'University'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('college'), 'Education'] = 'College'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('school'), 'Education'] = 'school'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('programmed'), 'Education'] = 'AI'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('artificial'), 'Education'] = 'AI'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('self'), 'Education'] = 'Independent'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('trained'), 'Education'] = 'Trained'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('training'), 'Education'] = 'Trained'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('institute'), 'Education'] = 'Institute'
dc_df_fame.loc[dc_df_fame['Education'].str.contains('degree'), 'Education'] = 'Advanced degree'
marvel_df_fame['Education'] = marvel_df_fame['Education'].str.lower()
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('n/a'), 'Education'] = 'Unknown'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('unknown'), 'Education'] = 'Unknown'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('unrevealed'), 'Education'] = 'Unknown'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('innaplicable'), 'Education'] = 'Unknown'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('dropout'), 'Education'] = 'Dropout'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('unfinished'), 'Education'] = 'Dropout'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('high school'), 'Education'] = 'High School'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('ms'), 'Education'] = 'Master'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('master'), 'Education'] = 'Master'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('masters'), 'Education'] = 'Master'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('ba'), 'Education'] = 'Bachelor'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('phd'), 'Education'] = 'PH.D'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('doctor'), 'Education'] = 'PH.D'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('ph.d'), 'Education'] = 'PH.D'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('doctorate'), 'Education'] = 'PH.D'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('university'), 'Education'] = 'University'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('college'), 'Education'] = 'College'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('school'), 'Education'] = 'school'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('programmed'), 'Education'] = 'AI'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('artificial'), 'Education'] = 'AI'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('self'), 'Education'] = 'Independent'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('trained'), 'Education'] = 'Trained'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('training'), 'Education'] = 'Trained'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('institute'), 'Education'] = 'Institute'
marvel_df_fame.loc[marvel_df_fame['Education'].str.contains('degree'), 'Education'] = 'Advanced degree'
We rename the Years column in order to be able to use it in the following functions
marvel_df_fame = marvel_df_fame.rename(columns={"Years": "years"})
dc_df_fame = dc_df_fame.rename(columns={"Years": "years"})
def create_Fame_count_dataframe (dataframe, column_str=''):
'''
This function create a dataframe based on the top attribut categories for each fame category : Forgotten, Intermediate,
Famous.
It is called by Marvel_DC_Fame_dataframes() that has extracted the top attribut categories
dataframe: Marvel or DC subdataframe (Sample on categories and eventually for a particular year)
column_str: precise which attribut to study in particular
'''
#If column_str is empty, raise error
if column_str == '':
print('Error: please select a column to study!\n')
return
#check if column belongs to dataframe
bool_ = column_str in dataframe.columns
if not bool_ :
print('Error: {} column does not belong to the dataframe\n'.format(column_str))
return
#Create an empty list of dataframe
dfList = []
#Create a dataframe for each fame category
for i in range(len(behaviors)):
#Obtain sub-dataframe by grouping on the fame category
sub_df = dataframe[dataframe['Famous'] == fame[i]]
#Value count on the column of interest
sub_df = pd.DataFrame(sub_df[column_str].value_counts())
#Check if we have unknown values and drop them
if 'Unknown' in sub_df.index:
sub_df = sub_df.drop('Unknown')
sub_df.columns = [fame[i]+'_count']
dfList.append(sub_df)
#dfs = [df.set_index(column_str) for df in dfList]
#Outer join of the created dataframes on columnn_str. Fill Nan values with 0
df = pd.concat(dfList, axis=1, join='outer', sort=True) \
.fillna(0)
return df.sort_values(by='Famous_count', ascending=False)
def Marvel_DC_Fame_dataframes(dc_df, marvel_df, charac='', top=6, year=False):
'''
This function builds the Forgotten-Intermediate-Famous DC-Marvel Dataframe in order to generate the bar plot with
DC_vs_Marvel_Fame_analysis() function
dc_df : DC dataframe
marvel_df : Marvel dataframe
charac : characteristic
top : the top subcategories of the characterisitic we want to keep
'''
#If a year is specify, we do the analysis on one year
if year:
# We drop the duplicates so that we don't take into account the characters that appear several times in one year
marvel_df = marvel_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_df = dc_df.explode('years').dropna(subset=['years']).drop_duplicates(subset=['years','URL'])
dc_df = dc_df[dc_df['years'].apply(lambda x: x in year)]
marvel_df = marvel_df[marvel_df['years'].apply(lambda x: x in year)]
#If the dataframes are empty, we return empty dataframes
if(marvel_df.empty or dc_df.empty):
return pd.DataFrame(), pd.DataFrame()
else:
# Create dictionnary of the top subcategories of the characteristic
dict_ = top_characteristics(dc_df, marvel_df, attribut = charac, top=top)
# Sample Marvel and DC dataframe with these subcategories, and with the year
subdc = dc_df[dc_df[charac].apply(lambda x: x in dict_)]
submarvel = marvel_df[marvel_df[charac].apply(lambda x: x in dict_)]
# Create a Marvel and DC dataframe based on the counts of Forgotten-Intermediate-Famous behavior
Fame_submarvel = create_Fame_count_dataframe (submarvel, column_str=charac)
Fame_subdc = create_Fame_count_dataframe (subdc, column_str=charac)
#Join the two generated dataframe on index=top subcategories
Fame_df = Fame_submarvel.join(Fame_subdc, lsuffix='_Marvel', rsuffix='_DC').fillna(0)
#Normalize the Good-Bad-Neutral Marvel and DC dataframe and join them again
Fame_submarvel_norm = Fame_submarvel.div(Fame_submarvel.sum(axis=1), axis=0)
Fame_subdc_norm = Fame_subdc.div(Fame_subdc.sum(axis=1), axis=0)
Fame_df_norm = Fame_submarvel_norm.join(Fame_subdc_norm, lsuffix='_Marvel', rsuffix='_DC').fillna(0)
return Fame_df, Fame_df_norm
def DC_vs_Marvel_Fame_analysis(Fame_df_norm, Fame_df, fig, charac='', visible_trace=False, visible_axis=False):
'''
This function takes a dataframe built on the Good, Bad, Neutral behaviors for a certain characteristic, for both marvel
and DC; and the same dataframe, normalized. It outputs a stacked ('Forgotten', 'Intermediate', 'Famous') & grouped (Marvel - DC) barplot for
the different sub-categories of the characteristic
Fame_df_norm : normalized Fame dataframe
Fame_df : Fame dataframe
charac : attribut
visible_trace: define if the trace is visible, default=False
visible_axis: define if the x_axis is visible, default=False
'''
number_traces = 0;
for i, categ in enumerate(Fame_df_norm.index.tolist()):
for j, fame_ in enumerate(fame):
if i == 0:
show = True
else:
show = False
fig.add_trace(go.Bar(
x= ["Marvel"],
y= [Fame_df_norm[fame_+'_count_Marvel'][categ]],
text=[Fame_df[fame_+'_count_Marvel'][categ]],
textposition='auto',
name= fame_+' Marvel',
visible = visible_trace,
xaxis= 'x' + str(i+1),
marker_color=[colors_fame[0][j]],
showlegend = show,
legendgroup = colors_fame[0][j],
))
fig.add_trace(go.Bar(
x= ["DC"],
y= [Fame_df_norm[fame_+'_count_DC'][categ]],
text=[Fame_df[fame_+'_count_DC'][categ]],
textposition='auto',
name= fame_+' DC',
visible = visible_trace,
xaxis= 'x' + str(i+1),
marker_color=[colors_fame[1][j]],
showlegend = show,
legendgroup = colors_fame[1][j],
))
number_traces += 2
#The number of x-axes correspond to the number of attributs categories
number_xaxis = len(Fame_df_norm.index.tolist())
top=6
sep=1/top
#Check the number of categories and adapt the number of layout respectively
if(number_xaxis==6):
update_layout_6Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
elif(number_xaxis==5):
update_layout_5Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
elif(number_xaxis==4):
update_layout_4Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
elif(number_xaxis==3):
update_layout_3Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
elif(number_xaxis==3):
update_layout_3Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
elif(number_xaxis==2):
update_layout_2Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
elif(number_xaxis==1):
update_layout_1Xaxis(fig, Fame_df_norm, sep, charac, visible_axis)
else:
print('Nothing to be plotted!!\n')
#fig.show()
return number_traces
############# Button Graph ############
############# Global Trend ############
#Categories : ['Citizenship', 'Marital Status', 'Occupation', 'Education', 'Gender','Eyes', 'Hair', 'Place of Birth']
fig = go.Figure()
############# Number of traces = top * Forgotten-Intermediate-Famous
############# Hair: 18 traces ############
hair_Fame_df, hair_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Hair', top=6)
number_traces_hair = DC_vs_Marvel_Fame_analysis(hair_Fame_df_norm, hair_Fame_df, fig, 'Global Hair', visible_axis=True)
############# Marital Status: 18 traces ############
MS_Fame_df, MS_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Marital Status', top=6)
number_traces_MS = DC_vs_Marvel_Fame_analysis(MS_Fame_df_norm, MS_Fame_df, fig, 'Global Marital Status')
############# Citizenship: 18 traces ###############
City_Fame_df, City_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Citizenship', top=6)
number_traces_City = DC_vs_Marvel_Fame_analysis(City_Fame_df_norm, City_Fame_df, fig, 'Global Citizenship')
############# Eyes: 18 traces ###############
Eyes_Fame_df, Eyes_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Eyes', top=6)
number_traces_Eyes = DC_vs_Marvel_Fame_analysis(Eyes_Fame_df_norm, Eyes_Fame_df, fig, 'Global Eyes')
############# Education: 18 traces ###############
Edu_Fame_df, Edu_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Education', top=6)
number_traces_Edu = DC_vs_Marvel_Fame_analysis(Edu_Fame_df_norm, Edu_Fame_df, fig, 'Global Education')
############# Occupation: 18 traces ###############
Occ_Fame_df, Occ_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Occupation', top=6)
number_traces_Occ = DC_vs_Marvel_Fame_analysis(Occ_Fame_df_norm, Occ_Fame_df, fig, 'Global Occupation')
############# Place of Birth: 18 traces ###############
Birth_Fame_df, Birth_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Place of Birth', top=6)
number_traces_Birth = DC_vs_Marvel_Fame_analysis(Birth_Fame_df_norm, Birth_Fame_df, fig, 'Global Place of Birth')
############ Gender : 18 traces ##################
Gender_Fame_df, Gender_Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df_fame, marvel_df_fame, charac='Gender', top=6)
number_traces_Gender = DC_vs_Marvel_Fame_analysis(Gender_Fame_df_norm, Gender_Fame_df, fig, 'Global Gender')
# Make the first traces visible
for i in range(number_traces_hair):
fig.data[i].visible = True
#### BUTTON ####
# create the filters
#print('#traces hair = {}, #traces MS = {}'.format(number_traces_hair, number_traces_MS))
visible_Hair= [True]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_MS= [False]*number_traces_hair + [True]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_City= [False]*number_traces_hair + [False]*number_traces_MS + [True]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Eyes= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [True]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Edu= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [True]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Occ= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [True]*number_traces_Occ + [False]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Birth= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [True]*number_traces_Birth \
+ [False]*number_traces_Gender
visible_Gender= [False]*number_traces_hair + [False]*number_traces_MS + [False]*number_traces_City + [False]*number_traces_Eyes\
+ [False]*number_traces_Edu + [False]*number_traces_Occ + [False]*number_traces_Birth \
+ [True]*number_traces_Gender
# apply filters
top=6
sep = 1/top
fig.update_layout(
updatemenus=[
#category button
go.layout.Updatemenu(
active=0,
pad={"r": 10, "t": 10},
x=-0.22,
y=1.15,
xanchor='left',
yanchor='top',
buttons=list([
dict(label="Hair",
method="update",
args=[{"visible": visible_Hair},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+hair_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+hair_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+hair_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+hair_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+hair_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+hair_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Marital Status",
method="update",
args=[{"visible": visible_MS},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+MS_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+MS_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+MS_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+MS_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+MS_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+MS_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Citizenship",
method="update",
args=[{"visible": visible_City},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+City_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+City_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+City_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+City_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+City_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+City_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Eyes",
method="update",
args=[{"visible": visible_Eyes},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Eyes_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Eyes_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Eyes_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Eyes_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Eyes_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Eyes_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Education",
method="update",
args=[{"visible": visible_Edu},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Edu_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Edu_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Edu_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Edu_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Edu_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Edu_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Occupation",
method="update",
args=[{"visible": visible_Occ},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Occ_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Occ_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Occ_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Occ_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Occ_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Occ_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Place of Birth",
method="update",
args=[{"visible": visible_Birth},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Birth_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Birth_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Birth_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Birth_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Birth_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Birth_Fame_df_norm.index[5]+'</b>')
},
{'yaxis': {'title': 'Proportion %'}}
]),
dict(label="Gender",
method="update",
args=[{"visible": visible_Gender},
{'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Gender_Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Gender_Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Gender_Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Gender_Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Gender_Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
vivible=False)
},
{'yaxis': {'title': 'Proportion %'}}
])
]),
),
])
update_layout_6Xaxis(fig, hair_Fame_df_norm, sep, charac="Global", visible=True)
fig.show()
#This part save the plot
chart_studio.tools.set_credentials_file(username='pgaudill', api_key='PWDouWI3xODknjWdC0cG')
url = py.plot(fig, filename='Fame_GlobalTrend_Marvel_vs_DC.html', auto_open=False,)
print(url)
# Copy the above url in the get_embed method, and give the output to Ahmed in order to put it on the website.
tls.get_embed(url)
def Temporal_Slider_Marvel_vs_DC_Fame(dc_df, marvel_df, charac=''):
#create figure
fig = go.Figure()
years = range(1929,2020,5)
list_num_traces = []
list_year = []
x_axis = []
top = 6
sep = 1/top
startyear = 0
#Along the time
for i, year in enumerate(years):
#take statistics
Fame_df, Fame_df_norm = Marvel_DC_Fame_dataframes(dc_df, marvel_df, charac=charac, top=6, \
year=[year, year+1, year+2, year+3, year+4])
if Fame_df.index.empty:
continue
num_traces = DC_vs_Marvel_Fame_analysis(Fame_df_norm, Fame_df, fig, charac)
list_year.append(str(year)+'-'+str(year+4))
if len(list_year)==1:
first_Fame_df_norm = Fame_df_norm
list_num_traces.append(num_traces)
if len(Fame_df.index) == 6:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='<b>'+Fame_df_norm.index[5]+'</b>')
})
elif len(Fame_df_norm.index) == 5:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='<b>'+Fame_df_norm.index[4]+'</b>'),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(Fame_df_norm.index) == 4:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='<b>'+Fame_df_norm.index[3]+'</b>'),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(Fame_df_norm.index) == 3:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='<b>'+Fame_df_norm.index[2]+'</b>'),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='',
visible=False),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(Fame_df_norm.index) == 2:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='<b>'+Fame_df_norm.index[1]+'</b>'),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='',
visible=False),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='',
visible=False),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
elif len(Fame_df_norm.index) == 1:
x_axis.append({'xaxis' : dict(
domain= [0, sep],
anchor='x1',
title='<b>'+Fame_df_norm.index[0]+'</b>'),
'xaxis2' : dict(
domain=[sep, 2*sep],
anchor='x2',
title='',
visible=False),
'xaxis3' : dict(
domain= [2*sep, 3*sep],
anchor='x3',
title='',
visible=False),
'xaxis4' : dict(
domain=[3*sep, 4*sep],
anchor='x4',
title='',
visible=False),
'xaxis5' : dict(
domain=[4*sep, 5*sep],
anchor='x5',
title='',
visible=False),
'xaxis6' : dict(
domain= [5*sep, 1],
anchor='x6',
title='',
visible=False)
})
# Make the first traces visible
for i in range(list_num_traces[0]):
fig.data[i].visible = True
# Create and add slider
steps = []
sum_num_traces = 0
for i, num_traces in enumerate(list_num_traces):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
x_axis[i],
{'yaxis': {'title': 'Proportion %'}}]
)
for j in range(num_traces):
step["args"][0]['visible'][sum_num_traces +j] = True
sum_num_traces += num_traces
step['label']=list_year[i]
steps.append(step)
sliders = [dict(
active=0,
yanchor='top',
xanchor='left',
currentvalue={
'font': {'size': 10},
'prefix': 'Year:',
'visible': False,
'xanchor': 'left',
},
pad= {'b': 10, 't': 50},
y= 0,
steps=steps,
)]
#Update Fisrt trace
if(len(first_Fame_df_norm.index)==6):
update_layout_6Xaxis(fig, first_Fame_df_norm, sep, charac, True)
elif(len(first_Fame_df_norm.index)==5):
update_layout_5Xaxis(fig, first_Fame_df_norm, sep, charac, True)
elif(len(first_Fame_df_norm.index)==4):
update_layout_4Xaxis(fig, first_Fame_df_norm, sep, charac, True)
elif(len(first_Fame_df_norm.index)==3):
update_layout_3Xaxis(fig, first_Fame_df_norm, sep, charac, True)
elif(len(first_Fame_df_norm.index)==3):
update_layout_3Xaxis(fig, first_Fame_df_norm, sep, charac, True)
elif(len(first_Fame_df_norm.index)==2):
update_layout_2Xaxis(fig, first_Fame_df_norm, sep, charac, True)
elif(len(first_Fame_df_norm.index)==1):
update_layout_1Xaxis(fig, first_Fame_df_norm, sep, charac, True)
fig.update_layout(title=dict(text="Fame : Temporal " + charac + " Trend Marvel vs DC",
font= {'family':'Komika Hand',
'color':'#7f7f7f',
'size':20},
x=0.5,
xanchor='center',
y=0.95,
yanchor='top'),
sliders=sliders,
)
fig.show()
#This part save the plot
chart_studio.tools.set_credentials_file(username='pgaudill', api_key='PWDouWI3xODknjWdC0cG')
url = py.plot(fig, filename='FameTemporal'+charac+'Trend_Marvel_vs_DC.html', auto_open=False,)
print(url)
# Copy the above url in the get_embed method, and give the output to Ahmed in order to put it on the website.
tls.get_embed(url)
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Occupation')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Education')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Place of Birth')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Marital Status')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Gender')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Eyes')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Hair')
Temporal_Slider_Marvel_vs_DC_Fame(dc_df_fame, marvel_df_fame, charac='Citizenship')